Scripts to run CAVIAR, FINEMAP (version 1.1) and DAP-G

Nalls et. al. (2019) w/ 23andMe

Statistical Fine-mapping

Parkinson’s Disease GWAS.

dataset_name <- "Nalls23andMe_2019"
top_SNPs <- Nalls_top_SNPs <- import_topSNPs(
  topSS_path = Directory_info(Data_dirs, dataset_name, "topSumStats"),
  chrom_col = "CHR", position_col = "BP", snp_col="SNP",
  pval_col="P, all studies", effect_col="Beta, all studies", gene_col="Nearest Gene",
  caption= "Nalls et al. (2018) w/ 23andMe PD GWAS Summary Stats",
  group_by_locus = T, 
  locus_col = "Locus Number",
  remove_variants = "rs34637584")
# Manually add new SNP of interest
top_SNPs <- data.table::rbindlist(list(top_SNPs, 
                                      data.table::data.table(CHR=14,POS=55360203, SNP="rs3783642", 
                                                   P=0, Effect=1, Gene="ATG14", Locus=NA),
                                      data.table::data.table(CHR=12,POS=53797236, SNP="rs34656641",
                                                   P=0, Effect=1, Gene="SP1", Locus=NA),
                                      data.table::data.table(CHR=5,POS=126181862, SNP="rs959573",
                                                   P=0, Effect=1, Gene="LMNB1", Locus=NA),
                                      data.table::data.table(CHR=17,POS=40609405, SNP="rs9897702",
                                                   P=0, Effect=1, Gene="ATP6V0A1", Locus=NA)
                                      ))

gene_list <- unique(top_SNPs$Gene)

finemap_results[[dataset_name]] <- finemap_gene_list(top_SNPs,
                                               gene_list = unique(c("LRRK2", gene_list))[1:2],
                                               trim_gene_limits = c(T, rep(F,length(gene_list))),
                                               dataset_name = dataset_name,
                                               dataset_type = "GWAS",
                                               query_by ="coordinates",
                                               subset_path = "auto",
                                               finemap_method = c("SUSIE","FINEMAP"),
                                               #,"ABF","COJO"), 
                                               force_new_LD = F,  
                                               force_new_subset = F,
                                               force_new_finemap = F,
                 # file_path = Directory_info(dataset_name, "fullSumStats"),
                 fullSS_path = "./Data/GWAS/Nalls23andMe_2019/nallsEtAl2019_allSamples_allVariants.mod.txt",
                 chrom_col = "CHR", position_col = "POS", snp_col = "RSID",
                 pval_col = "p", effect_col = "beta", stderr_col = "se", 
                 freq_col = "freq", MAF_col = "calculate",
                 A1_col = "A1",
                 A2_col = "A2",
                 bp_distance = 500000*2,
                 download_reference = T, 
                 LD_reference = "1KG_Phase1",
                 superpopulation = "EUR", 
                 LD_block = F, 
                 min_MAF = 0,
                 remove_variants = c("rs34637584"),
                 remove_correlates = c("rs34637584"),
                 conditioned_snps = "auto", # Use the lead SNP for each gene 
                 n_causal = 5, 
                 remove_tmps = T)
## ^^^^^^^^^ Running echolocatoR on: LRRK2 ^^^^^^^^^

LRRK2

[1] "" [1] “—————— Step 1: Query —————” [1] “+ Importing previous Multi-finemap file…Importing summary stats.” [1] “Subset file looks good! :)” [1] “+ Removing subset tmp…” [1] “Removing specified variants: rs34637584” [1] "" [1] “— Step 2: Calculate Linkage Disequilibrium —” [1] “+ Previously computed LD matrix detected. Importing… Data/GWAS/Nalls23andMe_2019/LRRK2/plink/LD_matrix.RData” [1] "" [1] “————– Step 3: Filter SNPs ————-” [1] “^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^——– Step 4: Statistically Fine-map ——–^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^” [1] “+++ Previously multi-fine-mapped results identified. Importing…” [1] “+ Fine-mapping with ‘SUSIE, FINEMAP’ completed in 0 seconds.” [1] "" [1] “————— Step 7: Visualize ————–” [1] “Plotting… original” [1] “+ Constructing SNP labels…” [1] “Plotting… SUSIE” [1] “+ Constructing SNP labels…” [1] “Plotting… FINEMAP” [1] “+ Constructing SNP labels…” [1] “+ LRRK2 Credible Set SNPs”

[1] “Generating summary table…” [1] “LRRK2 fine-mapped in 19.52 seconds”

## ^^^^^^^^^ Running echolocatoR on: ASXL3 ^^^^^^^^^

ASXL3

[1] "" [1] “—————— Step 1: Query —————” [1] “+ Importing previous Multi-finemap file…Importing summary stats.” [1] “Subset file looks good! :)” [1] “+ Removing subset tmp…” [1] “Removing specified variants: rs34637584” [1] "" [1] “— Step 2: Calculate Linkage Disequilibrium —” [1] “+ Previously computed LD matrix detected. Importing… Data/GWAS/Nalls23andMe_2019/ASXL3/plink/LD_matrix.RData” [1] "" [1] “————– Step 3: Filter SNPs ————-” [1] “^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^——– Step 4: Statistically Fine-map ——–^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^” [1] “+++ Previously multi-fine-mapped results identified. Importing…” [1] “+ Fine-mapping with ‘SUSIE, FINEMAP’ completed in 0.01 seconds.” [1] "" [1] “————— Step 7: Visualize ————–” [1] “Plotting… original” [1] “+ Constructing SNP labels…” [1] “Plotting… SUSIE” [1] “+ Constructing SNP labels…” [1] “Plotting… FINEMAP” [1] “+ Constructing SNP labels…” [1] “+ ASXL3 Credible Set SNPs”

[1] “Generating summary table…” [1] “ASXL3 fine-mapped in 3.76 seconds”

Summarize Fine-mapping Results

Gather Annotations

## [1] "++ Submitting chunk 1 / 1"
## Note: zip::zip() is deprecated, please use zip::zipr() instead

Epigenetics

NOTE: Must run ‘merge_finemapping_results’ with haploreg_annotation=T for this to work.

##                        Hits Total_SNPs Percent_Total
## Promoter_histone_marks 215  1297       16.58        
## Enhancer_histone_marks 548  1297       42.25
## [1] "Conducting SNP epigenomic annotation enrichment tests..."
## [1] "+++ SNP list 1 :"
## [1] "+ Gathering annotation data from HaploReg..."
## [1] "++ Submitting chunk 1 / 1"
##                        Hits Total_SNPs Percent_Total
## Promoter_histone_marks 3    6          50           
## Enhancer_histone_marks 4    6          66.67        
## [1] "+++ SNP list 2 :"
## [1] "+ Gathering annotation data from HaploReg..."
## [1] "++ Submitting chunk 1 / 1"
##                        Hits Total_SNPs Percent_Total
## Promoter_histone_marks 48   560        8.57         
## Enhancer_histone_marks 141  560        25.18        
## [1] "++ Testing for enrichment of ' Promoter_histone_marks ' in the tissues ' BRN & BLD '"
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  cont_tab
## X-squared = 7.5689, df = NA, p-value = 0.03198
## 
## 
##  Fisher's Exact Test for Count Data
## 
## data:  cont_tab
## p-value = 0.03134
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   0.9106592 28.2085725
## sample estimates:
## odds ratio 
##   5.801191 
## 
## [1] "++ Testing for enrichment of ' Enhancer_histone_marks ' in the tissues ' BRN & BLD '"
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  cont_tab
## X-squared = 2.4016, df = NA, p-value = 0.2429
## 
## 
##  Fisher's Exact Test for Count Data
## 
## data:  cont_tab
## p-value = 0.1264
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   0.5411402 11.3177243
## sample estimates:
## odds ratio 
##   2.643166

Mutation Types

## [1] "\n + All 2 models converged upon 1 to 9 SNPs in 56.63 % of loci."

eQTL Boxplots: Fairfax

## [1] ""
## [1] "+ Processsing Expression data"
## [1] "++ Extracting probe info"
## [1] "++ CD14"
## [1] "++ IFN"
## [1] "++ LPS2"
## [1] "++ LPS24"
## [1] ""
## [1] "+ Processing Summary Stats data"
## [1] ""
## [1] "+ Processing Genotype data"
## Warning in melt.data.table(., geno_subset, id.vars = c("CHR", "SNP",
## "POS", : 'measure.vars' [289, 290, 292, 293, ...] are not all of the same
## type. By order of hierarchy, the molten data value column will be of type
## 'double'. All measure variables not of type 'double' will be coerced too.
## Check DETAILS in ?melt.data.table for more on coercion.
## [1] ""
## [1] "+ Merging Summary Stats, Genotype, and Expression data"
## [1] ""
## [1] "+ Plotting eQTLs"

Colocalization: MESA

subpops <- c("CAU","AFA","HIS")
dataset2_pathList <- paste0("./Data/eQTL/MESA_",subpops,"/LRRK2_",subpops,"_MESA.txt")

colocalize_geneList(gene_list = c("LRRK2"), 
                    dataset1_path = "./Data/GWAS/Nalls23andMe_2019/LRRK2/Multi-finemap/Multi-finemap_results.txt",
                    dataset2_pathList = dataset2_pathList,
                    dataset1_type = "cc",
                    dataset2_type = "quant",
                    shared_MAF = 1, 
                    PP_threshold = .8, 
                    save_results = T, 
                    show_plot = T)

colocalize_geneList <- function(gene_list, 
                                dataset1_path, 
                                dataset2_pathList, 
                                dataset1_type="cc",
                                dataset2_type = "quant",
                                shared_MAF=1,
                                PP_threshold=0.8,
                                save_results=T,
                                show_plot=T,
                                chrom_col = "chr", 
                                position_col = "pos_snps", 
                                snp_col="snps",
                                pval_col="pvalue", 
                                effect_col="beta", 
                                gene_col="gene_name",
                                stderr_col = "calculate",  
                                tstat_col = "statistic"){
  for(g in gene_list){
    cat('\n')
    cat("###", g, "\n") 
    for(d2 in dataset2_pathList){
      comparison_name <- paste0(basename(dataset1_path),".vs.",basename(d2))  
      subset_DT <- extract_SNP_subset(gene = gene, 
                    top_SNPs = top_SNPs, 
                    fullSS_path = fullSS_path,
                    subset_path  =  subset_path,
                    force_new_subset = force_new_subset,
                    
                    chrom_col = chrom_col, 
                    position_col = position_col, 
                    snp_col = snp_col,
                    pval_col = pval_col, 
                    effect_col = effect_col, 
                    stderr_col = stderr_col,
                    gene_col = gene_col, 
                    tstat_col = tstat_col,
                    MAF_col = MAF_col,
                    freq_col = freq_col,
                    A1_col = A1_col,
                    A2_col = A2_col,
                    
                    bp_distance = bp_distance,
                    superpopulation = superpopulation,  
                    min_POS = min_POS, 
                    max_POS = max_POS, 
                    file_sep = file_sep, 
                    query_by = query_by,
                    probe_path = probe_path
                    ) 
      
      coloc_DT <- COLOC(
          gene = g,
          dataset1_path = dataset1_path, 
          dataset2_path = d2,
          dataset1_type = dataset1_type,
          dataset2_type = dataset2_type,
          shared_MAF = shared_MAF,
          PP_threshold = PP_threshold, 
          save_results = save_results,
          show_plot = show_plot, 
          )
      coloc_DT <- cbind(Comparison = comparison_name)
      coloc_results[[dataset_name]]
    } 
  cat("\n")
  } 
  return(coloc_results %>% data.table::rbindlist())
}

Colocalization: Fairfax

Summarize

## [1] "+ Gathering Fairfax eQTLS for LRRK2"
## [1] "+ CD14"
## [1] "+ IFN"
## [1] "+ LPS2"
## [1] "+ LPS24"

Fine-mapping eQTLs

  • AFR = AFA = African
  • AMR = Ad Mixed American
  • EAS = East Asian
  • EUR = CAU = European
  • SAS = South Asian
  • HIS = Hispanic

Session Information

## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.5
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] susieR_0.8.1.0521  GeneOverlap_1.20.0 haploR_3.0.1      
##  [4] XGR_1.1.5          dnet_1.1.4         supraHex_1.22.0   
##  [7] hexbin_1.27.3      igraph_1.2.4.1     coloc_3.2-1       
## [10] snpStats_1.34.0    Matrix_1.2-17      survival_2.44-1.1 
## [13] biomaRt_2.40.0     BiocManager_1.30.4 tidyr_0.8.3       
## [16] gaston_1.5.5       RcppParallel_4.4.3 Rcpp_1.0.1        
## [19] curl_3.3           ggrepel_0.8.1      cowplot_0.9.4     
## [22] plotly_4.9.0       ggplot2_3.2.0      dplyr_0.8.1       
## [25] data.table_1.12.2  readxl_1.3.1       usethis_1.5.0     
## [28] devtools_2.0.2     R.utils_2.9.0      R.oo_1.22.0       
## [31] R.methodsS3_1.7.1 
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.1.4        RCircos_1.2.1          plyr_1.8.4            
##   [4] lazyeval_0.2.2         splines_3.6.0          crosstalk_1.0.0       
##   [7] wavethresh_4.6.8       GenomeInfoDb_1.20.0    ggnetwork_0.5.1       
##  [10] inline_0.3.15          digest_0.6.19          htmltools_0.3.6       
##  [13] gdata_2.18.0           magrittr_1.5           memoise_1.1.0         
##  [16] cluster_2.1.0          openxlsx_4.1.0.1       remotes_2.1.0         
##  [19] sna_2.4                prettyunits_1.0.2      colorspace_1.4-1      
##  [22] blob_1.1.1             rrcov_1.4-7            xfun_0.8              
##  [25] callr_3.2.0            crayon_1.3.4           RCurl_1.95-4.12       
##  [28] jsonlite_1.6           graph_1.62.0           ape_5.3               
##  [31] glue_1.3.1             gtable_0.3.0           zlibbioc_1.30.0       
##  [34] XVector_0.24.0         pkgbuild_1.0.3         Rgraphviz_2.28.0      
##  [37] BiocGenerics_0.30.0    DEoptimR_1.0-8         scales_1.0.0          
##  [40] mvtnorm_1.0-11         DBI_1.0.0              viridisLite_0.3.0     
##  [43] xtable_1.8-4           progress_1.2.2         bit_1.1-14            
##  [46] stats4_3.6.0           DT_0.7                 htmlwidgets_1.3       
##  [49] httr_1.4.0             gplots_3.0.1.1         RColorBrewer_1.1-2    
##  [52] pkgconfig_2.0.2        reshape_0.8.8          XML_3.98-1.20         
##  [55] reshape2_1.4.3         tidyselect_0.2.5       labeling_0.3          
##  [58] rlang_0.4.0            later_0.8.0            AnnotationDbi_1.46.0  
##  [61] munsell_0.5.0          cellranger_1.1.0       tools_3.6.0           
##  [64] cli_1.1.0              RSQLite_2.1.1          statnet.common_4.3.0  
##  [67] evaluate_0.14          stringr_1.4.0          yaml_2.2.0            
##  [70] processx_3.3.1         knitr_1.23             bit64_0.9-7           
##  [73] fs_1.3.1               zip_2.0.2              robustbase_0.93-5     
##  [76] caTools_1.17.1.2       purrr_0.3.2            nlme_3.1-140          
##  [79] mime_0.7               leaps_3.0              compiler_3.6.0        
##  [82] tibble_2.1.3           pcaPP_1.9-73           stringi_1.4.3         
##  [85] ps_1.3.0               desc_1.2.0             lattice_0.20-38       
##  [88] pillar_1.4.1           RUnit_0.4.32           bitops_1.0-6          
##  [91] httpuv_1.5.1           GenomicRanges_1.36.0   R6_2.4.0              
##  [94] promises_1.0.1         network_1.15           KernSmooth_2.23-15    
##  [97] IRanges_2.18.1         sessioninfo_1.1.1      MASS_7.3-51.4         
## [100] gtools_3.8.1           assertthat_0.2.1       pkgload_1.0.2         
## [103] BMA_3.18.9             rprojroot_1.3-2        withr_2.1.2           
## [106] S4Vectors_0.22.0       GenomeInfoDbData_1.2.1 parallel_3.6.0        
## [109] hms_0.4.2              grid_3.6.0             coda_0.19-2           
## [112] rmarkdown_1.13         Biobase_2.44.0         shiny_1.3.2
## [1] "susieR  0.8.1.521"